Climate Data: CO2 and Snow Accumulation¶

This lab accompanies a lecture for UC Berkeley's Data 100 by Fernando Pérez and Dr. Chelle Gentemann that covers the fundamental physical mechanisms behind global warming and analyzes CO2 and ocean temperature data.

In [1]:
%matplotlib inline

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns
import plotly.express as px
import plotly.graph_objs as go
from plotly.subplots import make_subplots
from plotly.offline import plot, iplot, init_notebook_mode
from scipy.interpolate import CubicSpline

import xarray as xr
from zipfile import ZipFile
import warnings
warnings.filterwarnings('ignore')

plt.rcParams["figure.figsize"] = (8, 6)
plt.rcParams["font.size"] = 14
plt.rcParams["figure.dpi"] = 600

from IPython.display import display, HTML
display(HTML("<style>.container { width:100% !important; }</style>"))

Mauna Loa CO2¶

Load Data¶

In [2]:
# data = pd.read_csv("./data/mauna_loa_co2.csv", na_values=-99.99).dropna()
# data.head(5)
In [3]:
filename = 'https://scrippsco2.ucsd.edu/assets/data/atmospheric/stations/in_situ_co2/monthly/monthly_in_situ_co2_mlo.csv'
mlo = pd.read_csv(filename, skiprows=59, na_values=-99.99).reset_index().dropna()
mlo.columns = ['year', 'month', 'date_index', 'fraction_date', 'c02',
               'data_adjusted_season', 'data_fit', 
               'data_adjusted_seasonally_fit','data_filled', 
               'data_adjusted_seasonally_filed', 'station']
mlo.head(2)
Out[3]:
year month date_index fraction_date c02 data_adjusted_season data_fit data_adjusted_seasonally_fit data_filled data_adjusted_seasonally_filed station
2 1958 3 21259 1958.2027 315.71 314.44 316.19 314.90 315.71 314.44 MLO
3 1958 4 21290 1958.2877 317.45 315.16 317.30 314.98 317.45 315.16 MLO

Exploratory Data Analysis¶

Variability in Annual Cycle¶

Plants take up CO2 in the spring/summer then release it in the fall/winter

In [4]:
trace1 = go.Scatter(x=mlo['fraction_date'], y=mlo['c02'])
trace2 = go.Scatter(x=mlo['fraction_date'], y=mlo['data_adjusted_seasonally_fit'])

fig = make_subplots()
fig.add_trace(trace1)
fig.add_trace(trace2)
fig['layout'].update(height=600, width=700, showlegend=False,
                     title='Atmospheric CO2 at Mauna Loa Observatory',
                     xaxis_title = 'Year', 
                     yaxis_title='CO2 fraction in dry air (ppm)')
iplot(fig)

# plt.plot(data=mlo, 'fraction_date', 'c02');
# plt.plot(data=mlo, 'fraction_date', 'data_adjusted_seasonally_fit');
# plt.title('Atmospheric CO2 at Mauna Loa Observatory');
# plt.xlabel('Year');
# plt.ylabel('CO2 fraction in dry air (ppm)');

Monthly Cycle For All Years¶

CO2 emission follows a similar monthly cycle each year. There is a slight increase around May and a dip around October.

In [5]:
sns.lineplot(data=mlo, x="month", y="c02", hue="year");
plt.legend(bbox_to_anchor=(1, 1), fontsize=10);
plt.xlabel('Month', fontsize=10);
plt.ylabel('CO2 fraction in dry air (ppm)', fontsize=10);
plt.title('Monthly CO2 Cycle by Year', fontsize=15);
plt.tick_params(axis='both', labelsize=10);

Estimate Increase in Amplitude of Annual Cycle¶

In [6]:
annual = mlo.groupby('month').mean()
anomaly = annual - annual.mean()

def detrend(df):
    temp = df - df.mean()
    temp['month'] = df['month']
    return temp

c02anomaly = mlo.drop(columns='station').groupby('year').apply(detrend)
monthly_anomaly = c02anomaly.groupby('month').mean()[['c02']]
# display(monthly_anomaly)

fig, ax = plt.subplots()
ax.plot(mlo['fraction_date'], mlo['data_filled'], 'r.', markersize=2);
ax.plot(mlo['fraction_date'], mlo['data_adjusted_seasonally_fit'], 'b');
ax.set_xlabel('Year', fontsize=10)
ax.set_ylabel('CO2 fraction in dry air (ppm)', fontsize=10)
ax.set_title('Monthly Mean CO2', fontsize=15)
ax.tick_params(axis='both', labelsize=10);
ax.grid(False)

axin1 = ax.inset_axes([0.1, 0.6, 0.3, 0.3])
cs = CubicSpline(monthly_anomaly.index.values, monthly_anomaly.c02)
xs = np.arange(1, 12.1, 0.1)
axin1.plot(monthly_anomaly.index, monthly_anomaly.c02, 'r.', markersize=8);
axin1.plot(xs, cs(xs));
axin1.set_xlabel('Month', fontsize=8)
axin1.set_ylabel('CO2 fraction in dry air (ppm)', fontsize=8);
axin1.set_title('Seasonal Anomaly', fontsize=10);
axin1.tick_params(axis='both', labelsize=8);
axin1.set_xticks(np.arange(2, 13, 2));

ERA5: Snow Density¶

Download and Clean Data¶

  • Product type: Monthly averaged reanalysis
  • Variable: Land-sea mask, Snow density
  • Year: 2010, 2011, 2012, 2013, 2014, 2015, 2016, 2017, 2018, 2019
  • Month: January, February, March, April, May, June, July, August, September, October, November, December
  • Time: 00:00
In [7]:
# files = ZipFile('data/era5_snow_monthly.nc.zip')
# ds = xr.open_dataset(files.open(files.namelist()[0]))
# # ds
# mask = ds.lsm.mean('time') # {0: sea, 1: land, other: assume land}
# snow = ds.rsn.where(mask > 0, drop=True)

# snow = snow.interp(coords={'latitude': snow.latitude[0::7], 
#                            'longitude': snow.longitude[0::8]})

# snow.to_netcdf('data/era5_snow.nc')
# # snow

Load Data¶

In [8]:
files = ZipFile('data/era5_snow.nc.zip')
snow = xr.open_dataset(files.open(files.namelist()[0])).rsn
snow
Out[8]:
<xarray.DataArray 'rsn' (time: 120, latitude: 99, longitude: 180)>
[2138400 values with dtype=float32]
Coordinates:
  * time       (time) datetime64[ns] 2010-01-01 2010-02-01 ... 2019-12-01
  * latitude   (latitude) float32 83.75 82.0 80.25 78.5 ... -85.0 -86.75 -88.5
  * longitude  (longitude) float32 0.0 2.0 4.0 6.0 ... 352.0 354.0 356.0 358.0
Attributes:
    units:      kg m**-3
    long_name:  Snow density
xarray.DataArray
'rsn'
  • time: 120
  • latitude: 99
  • longitude: 180
  • ...
    [2138400 values with dtype=float32]
    • time
      (time)
      datetime64[ns]
      2010-01-01 ... 2019-12-01
      long_name :
      time
      array(['2010-01-01T00:00:00.000000000', '2010-02-01T00:00:00.000000000',
             '2010-03-01T00:00:00.000000000', '2010-04-01T00:00:00.000000000',
             '2010-05-01T00:00:00.000000000', '2010-06-01T00:00:00.000000000',
             '2010-07-01T00:00:00.000000000', '2010-08-01T00:00:00.000000000',
             '2010-09-01T00:00:00.000000000', '2010-10-01T00:00:00.000000000',
             '2010-11-01T00:00:00.000000000', '2010-12-01T00:00:00.000000000',
             '2011-01-01T00:00:00.000000000', '2011-02-01T00:00:00.000000000',
             '2011-03-01T00:00:00.000000000', '2011-04-01T00:00:00.000000000',
             '2011-05-01T00:00:00.000000000', '2011-06-01T00:00:00.000000000',
             '2011-07-01T00:00:00.000000000', '2011-08-01T00:00:00.000000000',
             '2011-09-01T00:00:00.000000000', '2011-10-01T00:00:00.000000000',
             '2011-11-01T00:00:00.000000000', '2011-12-01T00:00:00.000000000',
             '2012-01-01T00:00:00.000000000', '2012-02-01T00:00:00.000000000',
             '2012-03-01T00:00:00.000000000', '2012-04-01T00:00:00.000000000',
             '2012-05-01T00:00:00.000000000', '2012-06-01T00:00:00.000000000',
             '2012-07-01T00:00:00.000000000', '2012-08-01T00:00:00.000000000',
             '2012-09-01T00:00:00.000000000', '2012-10-01T00:00:00.000000000',
             '2012-11-01T00:00:00.000000000', '2012-12-01T00:00:00.000000000',
             '2013-01-01T00:00:00.000000000', '2013-02-01T00:00:00.000000000',
             '2013-03-01T00:00:00.000000000', '2013-04-01T00:00:00.000000000',
             '2013-05-01T00:00:00.000000000', '2013-06-01T00:00:00.000000000',
             '2013-07-01T00:00:00.000000000', '2013-08-01T00:00:00.000000000',
             '2013-09-01T00:00:00.000000000', '2013-10-01T00:00:00.000000000',
             '2013-11-01T00:00:00.000000000', '2013-12-01T00:00:00.000000000',
             '2014-01-01T00:00:00.000000000', '2014-02-01T00:00:00.000000000',
             '2014-03-01T00:00:00.000000000', '2014-04-01T00:00:00.000000000',
             '2014-05-01T00:00:00.000000000', '2014-06-01T00:00:00.000000000',
             '2014-07-01T00:00:00.000000000', '2014-08-01T00:00:00.000000000',
             '2014-09-01T00:00:00.000000000', '2014-10-01T00:00:00.000000000',
             '2014-11-01T00:00:00.000000000', '2014-12-01T00:00:00.000000000',
             '2015-01-01T00:00:00.000000000', '2015-02-01T00:00:00.000000000',
             '2015-03-01T00:00:00.000000000', '2015-04-01T00:00:00.000000000',
             '2015-05-01T00:00:00.000000000', '2015-06-01T00:00:00.000000000',
             '2015-07-01T00:00:00.000000000', '2015-08-01T00:00:00.000000000',
             '2015-09-01T00:00:00.000000000', '2015-10-01T00:00:00.000000000',
             '2015-11-01T00:00:00.000000000', '2015-12-01T00:00:00.000000000',
             '2016-01-01T00:00:00.000000000', '2016-02-01T00:00:00.000000000',
             '2016-03-01T00:00:00.000000000', '2016-04-01T00:00:00.000000000',
             '2016-05-01T00:00:00.000000000', '2016-06-01T00:00:00.000000000',
             '2016-07-01T00:00:00.000000000', '2016-08-01T00:00:00.000000000',
             '2016-09-01T00:00:00.000000000', '2016-10-01T00:00:00.000000000',
             '2016-11-01T00:00:00.000000000', '2016-12-01T00:00:00.000000000',
             '2017-01-01T00:00:00.000000000', '2017-02-01T00:00:00.000000000',
             '2017-03-01T00:00:00.000000000', '2017-04-01T00:00:00.000000000',
             '2017-05-01T00:00:00.000000000', '2017-06-01T00:00:00.000000000',
             '2017-07-01T00:00:00.000000000', '2017-08-01T00:00:00.000000000',
             '2017-09-01T00:00:00.000000000', '2017-10-01T00:00:00.000000000',
             '2017-11-01T00:00:00.000000000', '2017-12-01T00:00:00.000000000',
             '2018-01-01T00:00:00.000000000', '2018-02-01T00:00:00.000000000',
             '2018-03-01T00:00:00.000000000', '2018-04-01T00:00:00.000000000',
             '2018-05-01T00:00:00.000000000', '2018-06-01T00:00:00.000000000',
             '2018-07-01T00:00:00.000000000', '2018-08-01T00:00:00.000000000',
             '2018-09-01T00:00:00.000000000', '2018-10-01T00:00:00.000000000',
             '2018-11-01T00:00:00.000000000', '2018-12-01T00:00:00.000000000',
             '2019-01-01T00:00:00.000000000', '2019-02-01T00:00:00.000000000',
             '2019-03-01T00:00:00.000000000', '2019-04-01T00:00:00.000000000',
             '2019-05-01T00:00:00.000000000', '2019-06-01T00:00:00.000000000',
             '2019-07-01T00:00:00.000000000', '2019-08-01T00:00:00.000000000',
             '2019-09-01T00:00:00.000000000', '2019-10-01T00:00:00.000000000',
             '2019-11-01T00:00:00.000000000', '2019-12-01T00:00:00.000000000'],
            dtype='datetime64[ns]')
    • latitude
      (latitude)
      float32
      83.75 82.0 80.25 ... -86.75 -88.5
      units :
      degrees_north
      long_name :
      latitude
      array([ 83.75,  82.  ,  80.25,  78.5 ,  76.75,  75.  ,  73.25,  71.5 ,  69.75,
              68.  ,  66.25,  64.5 ,  62.75,  61.  ,  59.25,  57.5 ,  55.75,  54.  ,
              52.25,  50.5 ,  48.75,  47.  ,  45.25,  43.5 ,  41.75,  40.  ,  38.25,
              36.5 ,  34.75,  33.  ,  31.25,  29.5 ,  27.75,  26.  ,  24.25,  22.5 ,
              20.75,  19.  ,  17.25,  15.5 ,  13.75,  12.  ,  10.25,   8.5 ,   6.75,
               5.  ,   3.25,   1.5 ,  -0.25,  -2.  ,  -3.75,  -5.5 ,  -7.25,  -9.  ,
             -10.75, -12.5 , -14.25, -16.  , -17.75, -19.5 , -21.25, -23.  , -24.75,
             -26.5 , -28.25, -30.  , -31.75, -33.5 , -35.25, -37.  , -38.75, -40.5 ,
             -42.25, -44.  , -45.75, -47.5 , -49.25, -51.  , -52.75, -54.5 , -56.25,
             -58.  , -60.5 , -62.25, -64.  , -65.75, -67.5 , -69.25, -71.  , -72.75,
             -74.5 , -76.25, -78.  , -79.75, -81.5 , -83.25, -85.  , -86.75, -88.5 ],
            dtype=float32)
    • longitude
      (longitude)
      float32
      0.0 2.0 4.0 ... 354.0 356.0 358.0
      units :
      degrees_east
      long_name :
      longitude
      array([  0.,   2.,   4.,   6.,   8.,  10.,  12.,  14.,  16.,  18.,  20.,  22.,
              24.,  26.,  28.,  30.,  32.,  34.,  36.,  38.,  40.,  42.,  44.,  46.,
              48.,  50.,  52.,  54.,  56.,  58.,  60.,  62.,  64.,  66.,  68.,  70.,
              72.,  74.,  76.,  78.,  80.,  82.,  84.,  86.,  88.,  90.,  92.,  94.,
              96.,  98., 100., 102., 104., 106., 108., 110., 112., 114., 116., 118.,
             120., 122., 124., 126., 128., 130., 132., 134., 136., 138., 140., 142.,
             144., 146., 148., 150., 152., 154., 156., 158., 160., 162., 164., 166.,
             168., 170., 172., 174., 176., 178., 180., 182., 184., 186., 188., 190.,
             192., 194., 196., 198., 200., 202., 204., 206., 208., 210., 212., 214.,
             216., 218., 220., 222., 224., 226., 228., 230., 232., 234., 236., 238.,
             240., 242., 244., 246., 248., 250., 252., 254., 256., 258., 260., 262.,
             264., 266., 268., 270., 272., 274., 276., 278., 280., 282., 284., 286.,
             288., 290., 292., 294., 296., 298., 300., 302., 304., 306., 308., 310.,
             312., 314., 316., 318., 320., 322., 324., 326., 328., 330., 332., 334.,
             336., 338., 340., 342., 344., 346., 348., 350., 352., 354., 356., 358.],
            dtype=float32)
  • units :
    kg m**-3
    long_name :
    Snow density

Exploratory Data Analysis¶

In [9]:
fig, axs = plt.subplots(ncols=2, figsize=(15, 7))

xr.plot.hist(snow, ax=axs[0]);
axs[0].set_title('Snow Density Distribution', fontsize=15);

snow.mean('time').plot(ax=axs[1])
axs[1].set_title('Average Density', fontsize=15);

Snow Accumulation Peaks in Summer and Winter of 2000¶

There is more snow across the globe in February from snowfall in the winter season and less in August from warm weather in the summer. Greenland and Antarctica's snow density was similar both times of the year

In [10]:
fig, axs = plt.subplots(ncols=2, figsize=(15, 7))

snow.sel(time='2010-02').plot(ax=axs[0]);
snow.sel(time='2010-08').plot(ax=axs[1]);

Peaks: Northern vs Southern Hemisphere¶

On average, snow density peaks at around April and is lowest around July/August in the Northern Hemisphere. Snow density stays relatively constant throughout the year, on average.

In [11]:
by_month = snow.groupby('time.month').mean()
nn = by_month.where(snow.latitude > 0, drop=True).mean(['latitude', 'longitude'])
ss = by_month.where(snow.latitude < 0, drop=True).mean(['latitude', 'longitude'])

trace1 = go.Scatter(x=nn.month, y=nn, name='north')
trace2 = go.Scatter(x=ss.month, y=ss, name='south')

fig = make_subplots()
fig.add_trace(trace1)
fig.add_trace(trace2)
fig['layout'].update(height=600, width=600, showlegend=True,
                     title="Average Monthly Density in the 2010's",
                     xaxis_title = 'Month', yaxis_title='Snow Density')
iplot(fig)

# nn.plot(label='north');
# ss.plot(label='south');
# plt.legend();

Max and Min Snow Density in Northern Hemisphere¶

Max density in April and fluctuates throughout the years, while min in July/August stays constant

In [12]:
import datetime as dt

def extract_peaks(snow_data):
    years = np.arange(2010, 2020).astype(str)
    peaks = []
    
    for y in years:
        snow_sel = snow_data.sel(time=y)
        min_month = pd.to_datetime(snow_sel.idxmin().values).month
        max_month = pd.to_datetime(snow_sel.idxmax().values).month
        min_dayofyear = int(snow_sel.idxmin().dt.dayofyear)
        max_dayofyear = int(snow_sel.idxmax().dt.dayofyear)
        min_snow = float(snow_sel.min())
        max_snow = float(snow_sel.max())
        amplitude = max_snow - min_snow
        vals = [y, min_month, max_month, min_snow, max_snow, amplitude]
        peaks.append(vals)
        
    return pd.DataFrame(peaks, columns = ['year', 'min_month', 'max_month', 
                                          'min_snow', 'max_snow', 'amplitude'])

peaks = extract_peaks(snow.where(snow.latitude > 0, drop=True).mean(['latitude', 'longitude']))
display(peaks)
year min_month max_month min_snow max_snow amplitude
0 2010 7 4 112.835518 155.768860 42.933342
1 2011 7 4 113.086121 162.643204 49.557083
2 2012 7 4 112.561478 156.796539 44.235062
3 2013 8 4 113.702126 160.700287 46.998161
4 2014 8 4 112.960968 159.668961 46.707993
5 2015 7 4 112.696663 163.593735 50.897072
6 2016 7 4 112.435371 159.232452 46.797081
7 2017 8 4 113.219757 164.261642 51.041885
8 2018 8 4 112.922371 162.334503 49.412132
9 2019 8 4 112.321518 163.467422 51.145905
In [13]:
trace1 = go.Scatter(x=peaks.year, y=peaks.min_snow)
trace2 = go.Scatter(x=peaks.year, y=peaks.max_snow)
# trace3 = go.Bar(x=peaks.year, y=peaks.amplitude)

fig = make_subplots(specs=[[{"secondary_y": True}]])
fig.add_trace(trace1)
fig.add_trace(trace2)
# fig.add_trace(trace3,secondary_y=True)
fig['layout'].update(height=600, width=600, showlegend=False,
                     title="Average Monthly Density in the 2010's",
                     xaxis_title = 'Month', yaxis_title='Snow Density')
iplot(fig)

# plt.figure(figsize=(12, 12))
# plt.plot('year', 'min_snow', data=peaks);
# plt.plot('year', 'max_snow', data=peaks);
# plt.plot('year', 'min_snow', 'r.', color='black', data=peaks);
# plt.plot('year', 'max_snow', 'r.', color='black', data=peaks);
# plt.legend(['Minimum', 'Maximum']);
# plt.title('Min and Max Snow Accumulation');
# plt.xlabel('Year');
# plt.ylabel('Snow Density');
# plt.xticks(rotation=60);
In [ ]:
# %cd ../eugpoon.github.io 
from IPython.display import display, Javascript
display(Javascript('IPython.notebook.save_checkpoint();'))

!jupyter nbconvert  climate.ipynb --to html
%mv "climate.html" "../eugpoon.github.io/projects/"

display(Javascript('IPython.notebook.save_checkpoint();'))
In [ ]:
 
In [ ]: